%matplotlib inline
import visdom
import numpy as np
import chart_studio.plotly as py
import plotly.express as px
import plotly.tools as tls
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import pandas as pd
from datetime import datetime
import requests
import json
import matplotlib.pyplot as plt
import sys
sys.path.append("../TimeSeriesTools")
import mongodb_utils
db_host= 'localhost'
port = '28018'
db_name='TimeSeriesBench'
mongodb_client = mongodb_utils.mongodb_connect(db_host, port)
import kairosdb_utils
global kairosdb_server
kairosdb_server = "http://localhost:6060"
import influxdb_utils
db_host= 'localhost'
port = '9086'
db_name='TimeSeriesBench'
influxdb_client = influxdb_utils.influxdb_connect(db_host, port)
import warp10_utils
global warp10_server
warp10_server = "http://localhost:7070"
def get_collection_scheme(db_name,scheme_name):
db = mongodb_client[db_name]
schemes_coll = db['schemes']
scheme = schemes_coll.find({"name":scheme_name})
return scheme
def mongodb_find_all_data(db_name,coll_name,scheme):
data = mongodb_utils.get_all_data(mongodb_client,db_name,coll_name,scheme)
return data
def mongodb_find_data_select_by_tags(db_name,coll_name,tags,scheme):
data = mongodb_utils.get_data_select_by_tags(mongodb_client,db_name,coll_name,tags,scheme)
return data
def kairosdb_find_all_data(db_name,coll_name,scheme):
data = kairosdb_utils.get_all_data(kairosdb_server,db_name,coll_name,scheme)
return data
def influxdb_find_all_data(db_name,coll_name,scheme):
data = influxdb_utils.get_all_data(influxdb_client,db_name,coll_name,scheme)
return data
def warp10_find_all_data(db_name,coll_name,scheme):
data = warp10_utils.get_all_data(warp10_server,db_name,coll_name)
return data
def clean_data(scheme,data):
from cerberus import Validator
v = Validator(scheme)
for index,item in enumerate(data,start=0):
res = v.validate(item)
if (res == False):
print("corrupt data in line :",index,", error : ",v.errors)
del data[index]
def str_to_unix(date):
dt = datetime.strptime(date, '%d/%m/%Y %H:%M:%S')
epoch = datetime.utcfromtimestamp(0)
return int((dt - epoch).total_seconds()) * 1000
def to_unix_time(dt):
epoch = datetime.utcfromtimestamp(7200)
return (dt - epoch).total_seconds() * 1000
def str_to_timestamp(date):
dt = datetime.strptime(date, '%d/%m/%Y %H:%M:%S')
d0 = datetime(2019,1,1,0,0,0,0)
return int((dt - d0).total_seconds())
def type_convert(df):
head = df.columns
for i in range(1,len(head)) :
df[head[i]] = pd.to_numeric(df[head[i]])
scheme = get_collection_scheme(db_name,'SmartGrid')
scheme[0]['value']
%%time
coll_name='SmartGridCryolite20190101OneMonthBS10000d'
data = mongodb_find_all_data(db_name,coll_name,scheme[0]['value'])
print("number of docs",len(data))
%%time
coll_name='SmartGridCryolite20190101OneMonthBS10000d'
tags = { 'Buiding' : 'CRY', 'Device' : 'CENTRALE_SOLAIRE', 'Measure' : 'CRY_act_prod_pow' }
data = mongodb_find_data_select_by_tags(db_name,coll_name,tags,scheme[0]['value'])
print("number of docs",len(data))
%%time
coll_name='SmartGridCryolite20190101OneMonthV10000'
data = kairosdb_find_all_data(db_name,coll_name,scheme[0]['value'])
print("number of docs",len(data))
%%time
coll_name='SmartGridCryolite20190101OneMonthBS10000d'
data = influxdb_find_all_data(db_name,coll_name,scheme[0]['value'])
print("number of docs",len(data))
%%time
coll_name='SmartGridCryolite20190101OneMonthBS1000'
data = warp10_find_all_data(db_name,coll_name,scheme[0]['value'])
print("number of docs",len(data))
%%time
clean_data(scheme[0]['value'],data)
%%time
df = pd.DataFrame(data)
df[0:5]
df.tagname.unique()
df['timestamp'] = df['timestamp'].apply(str_to_timestamp)
df['day-id'] = df['timestamp']//(3600*24)
df['hour-id'] = df['timestamp'] % (3600*24) // 3600
df['hour-id'].unique()
df['value'] = pd.to_numeric(df['value'])
df['hour-id'] = pd.to_numeric(df['hour-id'])
import matplotlib.colors as mcolors
color_names = [ c for c in mcolors.CSS4_COLORS.keys()]
def compute_curve(df,day_id,tagname):
day_df = df.loc[(df['day-id'] == day_id) & (df['tagname'] == tagname )]
vh_df = day_df[['hour-id','value']].groupby('hour-id').mean().reset_index().sort_values(by='hour-id')
x = vh_df['hour-id']
y = vh_df['value']
return x,y
def plot_curve(df,day_id,tagname,color_names,fig):
x , y = compute_curve(df,day_id,tagname)
nb_points = len(x)
fig.add_trace(go.Scatter(x=x,y=y,name="Value"+tagname+"Day"+str(day_id),
line_color=color_names[day_id],
opacity=0.8))
# Use date string to set xaxis range
fig.update_layout(title_text="day smartgrid data series")
%%time
day_id = 10
tagname = 'CRY.CENTRALE_SOLAIRE.CRY_act_prod_pow'
fig = go.Figure()
plot_curve(df,day_id,tagname,color_names,fig)
fig.show()
%%time
fig = go.Figure()
for day_id in range(30):
plot_curve(df,day_id,tagname,color_names,fig)
fig.show()
scheme = get_collection_scheme(db_name,'WindProp')
scheme[0]['value']
%%time
coll_name='WindPropLacqOneDayP25B1000test'
data = mongodb_find_all_data(db_name,coll_name,scheme[0]['value'])
print("number of docs",len(data))
%%time
coll_name='WindPropLacqOneDayP25BS100'
data = kairosdb_find_all_data(db_name,coll_name,scheme[0]['value'])
print("number of docs",len(data))
%%time
coll_name='WindPropLacqOneDayP25BS100c'
data = influxdb_find_all_data(db_name,coll_name,scheme[0]['value'])
print("number of docs",len(data))
%%time
coll_name='WindPropLacqOneDayP25BS1000test'
data = warp10_find_all_data(db_name,coll_name,scheme[0]['value'])
print("number of docs",len(data[0][0]['v']))
%%time
df = pd.DataFrame(data)
df[0:5]
axisX = df['Heure']
df['Heure'] = df['Heure'].apply(str_to_unix)
df[0:5]
type_convert(df)
df['MDA Wnd Speed'].describe()
df['CavityTemp'].describe()
df['CavityPressure'].describe()
%matplotlib inline
nb_points=7200
x=axisX[0:nb_points]
fig = go.Figure()
fig.add_trace(go.Scatter(
x=x,
y=[item for item in df["MWD Wind Speed"][0:nb_points]],
name="MWD Wind Speed",
line_color='deepskyblue',
opacity=0.8))
fig.add_trace(go.Scatter(
x=x,
y=[item for item in df["MDA Wnd Speed"][0:nb_points]],
name="MDA Wnd Speed",
line_color='dimgray',
opacity=0.8))
# Use date string to set xaxis range
fig.update_layout(xaxis_range=[x[0],
x[nb_points-1]],
title_text="éolienne data series")
fig.show()
vis = visdom.Visdom()
vis.plotlyplot(fig, win="mywin3")
fig = make_subplots(
rows=3, cols=1, shared_xaxes=True, vertical_spacing=0.02
)
fig.add_trace(go.Scatter(
x=x,
y=[item for item in df["CavityTemp"][0:nb_points]],
name="CavityTemp",
line_color='deepskyblue',
line_width = 2,
opacity=0.8),
row=1, col=1)
fig.add_trace(go.Scatter(
x=x,
y=[item for item in df["MWD Wind Speed"][0:nb_points]],
name="MWD Wind Speed",
line_color='dimgray',
opacity=0.8),
row=2, col=1)
fig.add_trace(go.Scatter(
x=x,
y=[item for item in df["Temperature1"][0:nb_points]],
name="Temperature1",
line_color='rgb(49,130,189)',
line_width = 1.2,
opacity=0.8),
row=3, col=1)
fig.update_layout(height=1000, width=1000,
title_text="éolienne data series")
fig.show()
vis = visdom.Visdom()
vis.plotlyplot(fig, win="mywin3")
from sklearn.linear_model import LinearRegression
temp_ori = df['MDA Wnd Speed']
lr_x=df['Heure'].values.reshape(-1, 1)
lr_y=df['MDA Wnd Speed'].values.reshape(-1, 1)
lrModel = LinearRegression()
lrModel.fit(lr_x,lr_y)
lrModel.score(lr_x,lr_y)
alpha = lrModel.intercept_[0]
alpha
beta = lrModel.coef_[0][0]
beta
pred = alpha + beta*lr_x
pred
res = pred.reshape(7288,)
res
wspeed_p = temp_ori - res
wspeed_p
fig, ax = plt.subplots(1, 1, figsize=(6, 3))
wspeed_p.plot(ax=ax, lw=.5)
#ax.set_ylim(1.99, 2.02)
ax.set_xlabel('Date')
ax.set_ylabel('wspeed after treatement')
import scipy as sp
import scipy.fftpack
wspeed_p_fft = sp.fftpack.fft(wspeed_p.values)
#Calcul psd
wspeed_p_psd = np.abs(wspeed_p_fft) ** 2
fftfreq = sp.fftpack.fftfreq(len(wspeed_p_psd), 1. / 365*24*3600)
i = fftfreq > 0
fig, ax = plt.subplots(1, 1, figsize=(8, 4))
ax.plot(fftfreq[i], 10 * np.log10(wspeed_p_psd[i]))
ax.set_xlabel('Frequency (1/sec)')
ax.set_ylabel('PSD (dB)')
#Extraction des fréquences les plus importantes (FFT tronquée)
wspeed_p_fft_bis = wspeed_p_fft.copy()
wspeed_p_fft_bis[np.abs(fftfreq) > 0.00001] = 0
#Calcul de la courbe de température inverse de la FFT tronquée
wspeed_p_slow = np.real(sp.fftpack.ifft(wspeed_p_fft_bis))
fig, ax = plt.subplots(1, 1, figsize=(6, 3))
ax.plot(wspeed_p_slow, '-')
wspeed_p.plot(ax=ax, lw=.5)
ax.set_xlabel('Date')
ax.set_ylabel('Wind Speed')